From f8bc908ba31f43ac2c2fffe6e1e3fb068c69cc9a Mon Sep 17 00:00:00 2001 From: "kaf24@freefall.cl.cam.ac.uk" Date: Wed, 20 Oct 2004 14:44:56 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.244 (417679e8xXMjFVu9LO2SfkqXqR2RjA) Replace pseudo-4GB-segment instruction emulation with a segment-type trick plus instruction replay. Simpler and more robust but actually somewhat slower (we fault more times, as we can fault on both +ve and -ve accesses). --- .rootkeys | 2 +- xen/arch/x86/x86_32/emulate.c | 550 -------------------------------- xen/arch/x86/x86_32/seg_fixup.c | 488 ++++++++++++++++++++++++++++ xen/include/xen/perfc_defn.h | 2 +- 4 files changed, 490 insertions(+), 552 deletions(-) delete mode 100644 xen/arch/x86/x86_32/emulate.c create mode 100644 xen/arch/x86/x86_32/seg_fixup.c diff --git a/.rootkeys b/.rootkeys index 804e8a419d..7034cc7915 100644 --- a/.rootkeys +++ b/.rootkeys @@ -610,9 +610,9 @@ 3ddb79bccYVzXZJyVaxuv5T42Z1Fsw xen/arch/x86/trampoline.S 3ddb79bcOftONV9h4QCxXOfiT0h91w xen/arch/x86/traps.c 3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/x86/x86_32/domain_page.c -40f92331jfOlE7MfKwpdkEb1CEf23g xen/arch/x86/x86_32/emulate.c 3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/x86/x86_32/entry.S 3ddb79bcHwuCQDjBICDTSis52hWguw xen/arch/x86/x86_32/mm.c +40f92331jfOlE7MfKwpdkEb1CEf23g xen/arch/x86/x86_32/seg_fixup.c 3ddb79bc4nTpGQOe6_-MbyZzkhlhFQ xen/arch/x86/x86_32/usercopy.c 3ddb79bcOMCu9-5mKpjIh5d0qqBDPg xen/arch/x86/x86_32/xen.lds 40e96d3aLDI-nViMuYneD7VKYlZrVg xen/arch/x86/x86_64/entry.S diff --git a/xen/arch/x86/x86_32/emulate.c b/xen/arch/x86/x86_32/emulate.c deleted file mode 100644 index b2acb6783a..0000000000 --- a/xen/arch/x86/x86_32/emulate.c +++ /dev/null @@ -1,550 +0,0 @@ -/****************************************************************************** - * arch/x86/x86_32/emulate.c - * - * Emulation of certain classes of IA32 instruction. Used to emulate 4GB - * segments, for example. - * - * Copyright (c) 2004, K A Fraser - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* Make the scary benign errors go away. */ -#undef DPRINTK -#define DPRINTK(_f, _a...) ((void)0) - -/* - * Obtain the base and limit associated with the given segment selector. - * The selector must identify a 32-bit code or data segment. Any segment that - * appears to be truncated to not overlap with Xen is assumed to be a truncated - * 4GB segment, and the returned limit reflects this. - * @seg (IN) : Segment selector to decode. - * @base (OUT): Decoded linear base address. - * @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB). - */ -int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit) -{ - struct domain *d = current; - unsigned long *table, a, b; - int ldt = !!(seg & 4); - int idx = (seg >> 3) & 8191; - - /* Get base and check limit. */ - if ( ldt ) - { - table = (unsigned long *)LDT_VIRT_START; - if ( idx >= d->mm.ldt_ents ) - goto fail; - } - else /* gdt */ - { - table = (unsigned long *)GET_GDT_ADDRESS(d); - if ( idx >= GET_GDT_ENTRIES(d) ) - goto fail; - } - - /* Grab the segment descriptor. */ - if ( __get_user(a, &table[2*idx+0]) || - __get_user(b, &table[2*idx+1]) ) - goto fail; /* Barking up the wrong tree. Decode needs a page fault.*/ - - /* We only parse 32-bit code and data segments. */ - if ( (b & (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB)) != - (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB) ) - goto fail; - - /* Decode base and limit. */ - *base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); - *limit = ((b & 0xf0000) | (a & 0x0ffff)) + 1; - if ( (b & _SEGMENT_G) ) - *limit <<= 12; - - /* - * Anything that looks like a truncated segment we assume ought really - * to be a 4GB segment. DANGER! - */ - if ( (PAGE_OFFSET - (*base + *limit)) < PAGE_SIZE ) - *limit = 0; - - return 1; - - fail: - return 0; -} - -/* Turn a segment+offset into a linear address. */ -int linearise_address(u16 seg, unsigned long off, unsigned long *linear) -{ - unsigned long base, limit; - - if ( !get_baselimit(seg, &base, &limit) ) - return 0; - - if ( off > (limit-1) ) - return 0; - - *linear = base + off; - - return 1; -} - -/* Decode Reg field of a ModRM byte: return a pointer into a register block. */ -void *decode_reg(struct pt_regs *regs, u8 b) -{ - switch ( b & 7 ) - { - case 0: return ®s->eax; - case 1: return ®s->ecx; - case 2: return ®s->edx; - case 3: return ®s->ebx; - case 4: return ®s->esp; - case 5: return ®s->ebp; - case 6: return ®s->esi; - case 7: return ®s->edi; - } - - return NULL; -} - -/* - * Decode an effective address: - * @ppb (IN/OUT): IN == address of ModR/M byte; OUT == byte following EA. - * @preg (OUT) : address in pt_regs block of the EA register parameter. - * @pmem (OUT) : address of the EA memory parameter. - * @pseg (IN) : address in pt_regs block of the override segment. - * @regs (IN) : addrress of the the pt_regs block. - */ -#define DECODE_EA_FAILED 0 -#define DECODE_EA_FIXME 1 -#define DECODE_EA_SUCCESS 2 -int decode_effective_address(u8 **ppb, void **preg, void **pmem, - unsigned int *pseg, struct pt_regs *regs) -{ - u8 modrm, mod, reg, rm, *pb = *ppb; - void *memreg, *regreg; - unsigned long ea, limit, offset; - u8 disp8; - u32 disp32 = 0; - - if ( get_user(modrm, pb) ) - { - DPRINTK("Fault while extracting modrm byte\n"); - return DECODE_EA_FAILED; - } - - pb++; - - mod = (modrm >> 6) & 3; - reg = (modrm >> 3) & 7; - rm = (modrm >> 0) & 7; - - if ( rm == 4 ) - { - DPRINTK("FIXME: Add decoding for the SIB byte.\n"); - return DECODE_EA_FIXME; - } - - /* Decode Reg and R/M fields. */ - regreg = decode_reg(regs, reg); - memreg = decode_reg(regs, rm); - - /* Decode Mod field. */ - switch ( modrm >> 6 ) - { - case 0: - if ( pseg == NULL ) - pseg = ®s->xds; - disp32 = 0; - if ( rm == 5 ) /* disp32 rather than (EBP) */ - { - memreg = NULL; - if ( get_user(disp32, (u32 *)pb) ) - { - DPRINTK("Fault while extracting .\n"); - return DECODE_EA_FAILED; - } - pb += 4; - } - break; - - case 1: - if ( pseg == NULL ) /* NB. EBP defaults to SS */ - pseg = (rm == 5) ? ®s->xss : ®s->xds; - if ( get_user(disp8, pb) ) - { - DPRINTK("Fault while extracting .\n"); - return DECODE_EA_FAILED; - } - pb++; - disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;; - break; - - case 2: - if ( pseg == NULL ) /* NB. EBP defaults to SS */ - pseg = (rm == 5) ? ®s->xss : ®s->xds; - if ( get_user(disp32, (u32 *)pb) ) - { - DPRINTK("Fault while extracting .\n"); - return DECODE_EA_FAILED; - } - pb += 4; - break; - - case 3: - DPRINTK("Not a memory operand!\n"); - return DECODE_EA_FAILED; - } - - if ( !get_baselimit((u16)(*pseg), &ea, &limit) ) - return DECODE_EA_FAILED; - if ( limit != 0 ) - { - DPRINTK("Bailing: not a 4GB data segment.\n"); - return DECODE_EA_FAILED; - } - - offset = disp32; - if ( memreg != NULL ) - offset += *(u32 *)memreg; - if ( (offset & 0xf0000000) != 0xf0000000 ) - { - DPRINTK("Bailing: not a -ve offset into 4GB segment.\n"); - return DECODE_EA_FAILED; - } - - ea += offset; - if ( ea > (PAGE_OFFSET - PAGE_SIZE) ) - { - DPRINTK("!!!! DISALLOWING UNSAFE ACCESS !!!!\n"); - return DECODE_EA_FAILED; - } - - *ppb = pb; - *preg = regreg; - *pmem = (void *)ea; - - return DECODE_EA_SUCCESS; -} - -#define GET_IMM8 \ - if ( get_user(ib, (u8 *)pb) ) { \ - DPRINTK("Fault while extracting imm8\n"); \ - return 0; \ - } \ - pb += 1; -#define GET_IMM16 \ - if ( get_user(iw, (u8 *)pb) ) { \ - DPRINTK("Fault while extracting imm16\n"); \ - return 0; \ - } \ - pb += 2; -#define GET_IMM32 \ - if ( get_user(il, (u32 *)pb) ) { \ - DPRINTK("Fault while extracting imm32\n"); \ - return 0; \ - } \ - pb += 4; - -/* - * Called from the general-protection fault handler to attempt to decode - * and emulate an instruction that depends on 4GB segments. At this point - * we assume that the instruction itself is paged into memory (the CPU - * must have triggered this in order to decode the instruction itself). - */ -int gpf_emulate_4gb(struct pt_regs *regs) -{ - struct domain *d = current; - trap_info_t *ti; - struct guest_trap_bounce *gtb; - - u8 *eip; /* ptr to instruction start */ - u8 *pb, b; /* ptr into instr. / current instr. byte */ - u8 ib, mb, rb; /* byte operand from imm/register/memory */ - u16 iw, mw, rw; /* word operand from imm/register/memory */ - u32 il, ml, rl; /* long operand from imm/register/memory */ - void *reg, *mem; /* ptr to register/memory operand */ - unsigned int *pseg = NULL; /* segment for memory operand (NULL=default) */ - u32 eflags; - int opsz_override = 0; - - if ( !linearise_address((u16)regs->xcs, regs->eip, (unsigned long *)&eip) ) - { - DPRINTK("Cannot linearise %04x:%08lx\n", regs->xcs, regs->eip); - return 0; - } - - /* Parse prefix bytes. We're basically looking for segment override. */ - for ( pb = eip; (pb - eip) < 4; pb++ ) - { - if ( get_user(b, pb) ) - { - DPRINTK("Fault while accessing byte %d of instruction\n", pb-eip); - return 0; - } - - switch ( b ) - { - case 0xf0: /* LOCK */ - case 0xf2: /* REPNE/REPNZ */ - case 0xf3: /* REP/REPE/REPZ */ - case 0x67: /* Address-size override */ - DPRINTK("Unhandleable prefix byte %02x\n", b); - goto undecodeable; - case 0x66: /* Operand-size override */ - opsz_override = 1; - break; - case 0x2e: /* CS override */ - pseg = ®s->xcs; - break; - case 0x3e: /* DS override */ - pseg = ®s->xds; - break; - case 0x26: /* ES override */ - pseg = ®s->xes; - break; - case 0x64: /* FS override */ - pseg = ®s->xfs; - break; - case 0x65: /* GS override */ - pseg = ®s->xgs; - break; - case 0x36: /* SS override */ - pseg = ®s->xss; - break; - default: /* Not a prefix byte */ - goto done_prefix; - } - } - done_prefix: - - pb++; /* skip opcode byte */ - switch ( decode_effective_address(&pb, ®, &mem, pseg, regs) ) - { - case DECODE_EA_FAILED: - return 0; - case DECODE_EA_FIXME: - goto undecodeable; - } - - /* Only handle single-byte opcodes right now. Sufficient for MOV. */ - switch ( b ) - { - case 0x88: /* movb r,r/m */ - if ( __put_user(*(u8 *)reg, (u8 *)mem) ) - goto page_fault_w; - break; - case 0x89: /* movl r,r/m */ - if ( opsz_override ? __put_user(*(u16 *)reg, (u16 *)mem) - : __put_user(*(u32 *)reg, (u32 *)mem) ) - goto page_fault_w; - break; - case 0x8a: /* movb r/m,r */ - if ( __get_user(*(u8 *)reg, (u8 *)mem) ) - goto page_fault_r; - break; - case 0x8b: /* movl r/m,r */ - if ( opsz_override ? __get_user(*(u16 *)reg, (u16 *)mem) - : __get_user(*(u32 *)reg, (u32 *)mem) ) - goto page_fault_r; - break; - case 0xc6: /* movb imm,r/m */ - if ( reg != ®s->eax ) /* Reg == /0 */ - goto undecodeable; - GET_IMM8; - if ( __put_user(ib, (u8 *)mem) ) - goto page_fault_w; - break; - case 0xc7: /* movl imm,r/m */ - if ( reg != ®s->eax ) /* Reg == /0 */ - goto undecodeable; - if ( opsz_override ) - { - GET_IMM16; - if ( __put_user(iw, (u16 *)mem) ) - goto page_fault_w; - } - else - { - GET_IMM32; - if ( __put_user(il, (u32 *)mem) ) - goto page_fault_w; - } - break; - case 0x80: /* cmpb imm8,r/m */ - if ( reg != ®s->edi ) /* Reg == /7 */ - goto undecodeable; - GET_IMM8; - if ( __get_user(mb, (u8 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpb %b1,%b2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" (ib), "b" (mb) ); - regs->eflags &= ~0x8d5; /* OF,SF,ZF,AF,PF,CF */ - regs->eflags |= eflags & 0x8d5; - break; - case 0x81: /* cmpl imm32,r/m */ - if ( reg != ®s->edi ) /* Reg == /7 */ - goto undecodeable; - if ( opsz_override ) - { - GET_IMM16; - if ( __get_user(mw, (u16 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpw %w1,%w2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" (iw), "b" (mw) ); - } - else - { - GET_IMM32; - if ( __get_user(ml, (u32 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpl %1,%2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" (il), "b" (ml) ); - } - regs->eflags &= ~0x8d5; /* OF,SF,ZF,AF,PF,CF */ - regs->eflags |= eflags & 0x8d5; - break; - case 0x83: /* cmpl imm8,r/m */ - if ( reg != ®s->edi ) /* Reg == /7 */ - goto undecodeable; - GET_IMM8; - if ( opsz_override ) - { - iw = (u16)(s16)(s8)ib; - if ( __get_user(mw, (u16 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpw %w1,%w2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" (iw), "b" (mw) ); - } - else - { - il = (u32)(s32)(s8)ib; - if ( __get_user(ml, (u32 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpl %1,%2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" (il), "b" (ml) ); - } - regs->eflags &= ~0x8d5; /* OF,SF,ZF,AF,PF,CF */ - regs->eflags |= eflags & 0x8d5; - break; - case 0x38: /* cmpb r,r/m */ - case 0x3a: /* cmpb r/m,r */ - rb = *(u8 *)reg; - if ( __get_user(mb, (u8 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpb %b1,%b2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" ((b==0x38)?rb:mb), "b" ((b==0x38)?mb:rb) ); - regs->eflags &= ~0x8d5; /* OF,SF,ZF,AF,PF,CF */ - regs->eflags |= eflags & 0x8d5; - break; - case 0x39: /* cmpl r,r/m */ - case 0x3b: /* cmpl r/m,r */ - if ( opsz_override ) - { - rw = *(u16 *)reg; - if ( __get_user(mw, (u16 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpw %w1,%w2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" ((b==0x38)?rw:mw), "b" ((b==0x38)?mw:rw) ); - } - else - { - rl = *(u32 *)reg; - if ( __get_user(ml, (u32 *)mem) ) - goto page_fault_r; - __asm__ __volatile__ ( - "cmpl %1,%2 ; pushf ; popl %0" - : "=a" (eflags) - : "0" ((b==0x38)?rl:ml), "b" ((b==0x38)?ml:rl) ); - } - regs->eflags &= ~0x8d5; /* OF,SF,ZF,AF,PF,CF */ - regs->eflags |= eflags & 0x8d5; - break; - default: - DPRINTK("Unhandleable opcode byte %02x\n", b); - goto undecodeable; - } - - /* Success! */ - perfc_incrc(emulations); - regs->eip += pb - eip; - - /* If requested, give a callback on otherwise unused vector 15. */ - if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments_notify) ) - { - ti = &d->thread.traps[15]; - gtb = &guest_trap_bounce[d->processor]; - gtb->flags = GTBF_TRAP; - gtb->error_code = pb - eip; - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; - } - - return 1; - - undecodeable: - DPRINTK("Undecodable instruction %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x " - "caused GPF(0) at %04x:%08lx\n", - eip[0], eip[1], eip[2], eip[3], - eip[4], eip[5], eip[6], eip[7], - regs->xcs, regs->eip); - return 0; - - page_fault_w: - ti = &d->thread.traps[14]; - gtb = &guest_trap_bounce[d->processor]; - /* - * XXX We don't distinguish between page-not-present and read-only. - * Linux doesn't care, but this might need fixing if others do. - */ - gtb->error_code = 6; /* user fault, write access, page not present */ - goto page_fault_common; - page_fault_r: - ti = &d->thread.traps[14]; - gtb = &guest_trap_bounce[d->processor]; - gtb->error_code = 4; /* user fault, read access, page not present */ - page_fault_common: - gtb->flags = GTBF_TRAP_CR2; - gtb->cr2 = (unsigned long)mem; - gtb->cs = ti->cs; - gtb->eip = ti->address; - if ( TI_GET_IF(ti) ) - d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; - return 1; -} diff --git a/xen/arch/x86/x86_32/seg_fixup.c b/xen/arch/x86/x86_32/seg_fixup.c new file mode 100644 index 0000000000..f79a01c27c --- /dev/null +++ b/xen/arch/x86/x86_32/seg_fixup.c @@ -0,0 +1,488 @@ +/****************************************************************************** + * arch/x86/x86_32/seg_fixup.c + * + * Support for -ve accesses to pseudo-4GB segments. + * + * Copyright (c) 2004, K A Fraser + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Make the scary benign errors go away. */ +#undef DPRINTK +#define DPRINTK(_f, _a...) ((void)0) + +/* General instruction properties. */ +#define INSN_SUFFIX_BYTES (7) +#define OPCODE_BYTE (1<<4) +#define HAS_MODRM (1<<5) + +/* Short forms for the table. */ +#define X 0 /* invalid for some random reason */ +#define O OPCODE_BYTE +#define M HAS_MODRM + +static unsigned char insn_decode[256] = { + /* 0x00 - 0x0F */ + O|M, O|M, O|M, O|M, X, X, X, X, + O|M, O|M, O|M, O|M, X, X, X, X, + /* 0x10 - 0x1F */ + O|M, O|M, O|M, O|M, X, X, X, X, + O|M, O|M, O|M, O|M, X, X, X, X, + /* 0x20 - 0x2F */ + O|M, O|M, O|M, O|M, X, X, X, X, + O|M, O|M, O|M, O|M, X, X, X, X, + /* 0x30 - 0x3F */ + O|M, O|M, O|M, O|M, X, X, X, X, + O|M, O|M, O|M, O|M, X, X, X, X, + /* 0x40 - 0x4F */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0x50 - 0x5F */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0x60 - 0x6F */ + X, X, X, X, X, X, X, X, + X, O|M|4, X, O|M|1, X, X, X, X, + /* 0x70 - 0x7F */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0x80 - 0x8F */ + O|M|1, O|M|4, O|M|1, O|M|1, O|M, O|M, O|M, O|M, + O|M, O|M, O|M, O|M, O|M, O|M, O|M, X, + /* 0x90 - 0x9F */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0xA0 - 0xAF */ + O|1, O|4, O|1, O|4, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0xB0 - 0xBF */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0xC0 - 0xCF */ + O|M|1, O|M|1, X, X, X, X, O|M|1, O|M|4, + X, X, X, X, X, X, X, X, + /* 0xD0 - 0xDF */ + O|M, O|M, O|M, O|M, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0xE0 - 0xEF */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, X, X, + /* 0xF0 - 0xFF */ + X, X, X, X, X, X, X, X, + X, X, X, X, X, X, O|M, O|M +}; + +/* + * Obtain the base and limit associated with the given segment selector. + * The selector must identify a 32-bit code or data segment. Any segment that + * appears to be truncated to not overlap with Xen is assumed to be a truncated + * 4GB segment, and the returned limit reflects this. + * @seg (IN) : Segment selector to decode. + * @base (OUT): Decoded linear base address. + * @limit (OUT): Decoded segment limit, in bytes. 0 == unlimited (4GB). + */ +int get_baselimit(u16 seg, unsigned long *base, unsigned long *limit) +{ + struct domain *d = current; + unsigned long *table, a, b; + int ldt = !!(seg & 4); + int idx = (seg >> 3) & 8191; + + /* Get base and check limit. */ + if ( ldt ) + { + table = (unsigned long *)LDT_VIRT_START; + if ( idx >= d->mm.ldt_ents ) + goto fail; + } + else /* gdt */ + { + table = (unsigned long *)GET_GDT_ADDRESS(d); + if ( idx >= GET_GDT_ENTRIES(d) ) + goto fail; + } + + /* Grab the segment descriptor. */ + if ( __get_user(a, &table[2*idx+0]) || + __get_user(b, &table[2*idx+1]) ) + goto fail; /* Barking up the wrong tree. Decode needs a page fault.*/ + + /* We only parse 32-bit code and data segments. */ + if ( (b & (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB)) != + (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB) ) + goto fail; + + /* Decode base and limit. */ + *base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); + *limit = ((b & 0xf0000) | (a & 0x0ffff)) + 1; + if ( (b & _SEGMENT_G) ) + *limit <<= 12; + + /* + * Anything that looks like a truncated segment we assume ought really + * to be a 4GB segment. DANGER! + */ + if ( (PAGE_OFFSET - (*base + *limit)) < PAGE_SIZE ) + *limit = 0; + + return 1; + + fail: + return 0; +} + +/* Turn a segment+offset into a linear address. */ +int linearise_address(u16 seg, unsigned long off, unsigned long *linear) +{ + unsigned long base, limit; + + if ( !get_baselimit(seg, &base, &limit) ) + return 0; + + if ( off > (limit-1) ) + return 0; + + *linear = base + off; + + return 1; +} + +int fixup_seg(u16 seg, int positive_access) +{ + struct domain *d = current; + unsigned long *table, a, b, base, limit; + int ldt = !!(seg & 4); + int idx = (seg >> 3) & 8191; + + /* Get base and check limit. */ + if ( ldt ) + { + table = (unsigned long *)LDT_VIRT_START; + if ( idx >= d->mm.ldt_ents ) + { + DPRINTK("Segment %04x out of LDT range (%d)\n", + seg, d->mm.ldt_ents); + goto fail; + } + } + else /* gdt */ + { + table = (unsigned long *)GET_GDT_ADDRESS(d); + if ( idx >= GET_GDT_ENTRIES(d) ) + { + DPRINTK("Segment %04x out of GDT range (%d)\n", + seg, GET_GDT_ENTRIES(d)); + goto fail; + } + } + + /* Grab the segment descriptor. */ + if ( __get_user(a, &table[2*idx+0]) || + __get_user(b, &table[2*idx+1]) ) + { + DPRINTK("Fault while reading segment %04x\n", seg); + goto fail; /* Barking up the wrong tree. Decode needs a page fault.*/ + } + + /* We only parse 32-bit page-granularity non-privileged data segments. */ + if ( (b & (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB| + _SEGMENT_G|(1<<11)|_SEGMENT_DPL)) != + (_SEGMENT_P|_SEGMENT_S|_SEGMENT_DB|_SEGMENT_G|_SEGMENT_DPL) ) + { + DPRINTK("Bad segment %08lx:%08lx\n", a, b); + goto fail; + } + + /* Decode base and limit. */ + base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16); + limit = (((b & 0xf0000) | (a & 0x0ffff)) + 1) << 12; + + if ( b & (1 << 10) ) + { + /* Expands-down: All the way to zero? Assume 4GB if so. */ + if ( ((base + limit) < PAGE_SIZE) && positive_access ) + { + /* Flip to expands-up. */ + limit >>= 12; + limit -= (-PAGE_OFFSET/PAGE_SIZE) + 2; + goto flip; + } + } + else + { + /* Expands-up: All the way to Xen space? Assume 4GB if so. */ + if ( ((PAGE_OFFSET - (base + limit)) < PAGE_SIZE) && !positive_access ) + { + /* Flip to expands-down. */ + limit >>= 12; + limit += (-PAGE_OFFSET/PAGE_SIZE) + 0; + goto flip; + } + } + + DPRINTK("None of the above! (%08lx:%08lx, %d, %08lx, %08lx, %08lx)\n", + a, b, positive_access, base, limit, base+limit); + + fail: + return 0; + + flip: + a &= ~0x0ffff; a |= limit & 0x0ffff; + b &= ~0xf0000; b |= limit & 0xf0000; + b ^= 1 << 10; + /* NB. These can't fault. Checked readable above; must also be writable. */ + table[2*idx+0] = a; + table[2*idx+1] = b; + return 1; +} + +/* Decode Reg field of a ModRM byte: return a pointer into a register block. */ +void *decode_reg(struct pt_regs *regs, u8 b) +{ + switch ( b & 7 ) + { + case 0: return ®s->eax; + case 1: return ®s->ecx; + case 2: return ®s->edx; + case 3: return ®s->ebx; + case 4: return ®s->esp; + case 5: return ®s->ebp; + case 6: return ®s->esi; + case 7: return ®s->edi; + } + + return NULL; +} + +/* + * Called from the general-protection fault handler to attempt to decode + * and emulate an instruction that depends on 4GB segments. At this point + * we assume that the instruction itself is paged into memory (the CPU + * must have triggered this in order to decode the instruction itself). + */ +int gpf_emulate_4gb(struct pt_regs *regs) +{ + struct domain *d = current; + trap_info_t *ti; + struct guest_trap_bounce *gtb; + u8 modrm, mod, reg, rm, decode; + void *memreg, *regreg; + unsigned long offset; + u8 disp8; + u32 disp32 = 0; + u8 *eip; /* ptr to instruction start */ + u8 *pb, b; /* ptr into instr. / current instr. byte */ + unsigned int *pseg = NULL; /* segment for memory operand (NULL=default) */ + + /* WARNING: We only work for ring-3 segments. */ + if ( unlikely((regs->xcs & 3) != 3) ) + { + DPRINTK("Taken fault at bad CS %04x\n", regs->xcs); + goto fail; + } + + if ( !linearise_address((u16)regs->xcs, regs->eip, (unsigned long *)&eip) ) + { + DPRINTK("Cannot linearise %04x:%08lx\n", regs->xcs, regs->eip); + goto fail; + } + + /* Parse prefix bytes. We're basically looking for segment override. */ + for ( pb = eip; ; pb++ ) + { + if ( get_user(b, pb) ) + { + DPRINTK("Fault while accessing byte %d of instruction\n", pb-eip); + goto fail; + } + + if ( (pb - eip) == 4 ) + break; + + switch ( b ) + { + case 0xf0: /* LOCK */ + case 0xf2: /* REPNE/REPNZ */ + case 0xf3: /* REP/REPE/REPZ */ + case 0x67: /* Address-size override */ + DPRINTK("Unhandleable prefix byte %02x\n", b); + goto fixme; + case 0x66: /* Operand-size override */ + break; + case 0x2e: /* CS override */ + pseg = ®s->xcs; + break; + case 0x3e: /* DS override */ + pseg = ®s->xds; + break; + case 0x26: /* ES override */ + pseg = ®s->xes; + break; + case 0x64: /* FS override */ + pseg = ®s->xfs; + break; + case 0x65: /* GS override */ + pseg = ®s->xgs; + break; + case 0x36: /* SS override */ + pseg = ®s->xss; + break; + default: /* Not a prefix byte */ + goto done_prefix; + } + } + done_prefix: + + decode = insn_decode[b]; /* opcode byte */ + pb++; + if ( decode == 0 ) + { + DPRINTK("Unsupported opcode %02x\n", b); + goto fail; + } + + if ( !(decode & HAS_MODRM) ) + { + switch ( decode & 7 ) + { + case 1: + offset = (long)(*(char *)pb); + goto skip_modrm; + case 4: + offset = *(long *)pb; + goto skip_modrm; + default: + goto fail; + } + } + + /* + * Mod/RM processing. + */ + + if ( get_user(modrm, pb) ) + { + DPRINTK("Fault while extracting modrm byte\n"); + goto fail; + } + + pb++; + + mod = (modrm >> 6) & 3; + reg = (modrm >> 3) & 7; + rm = (modrm >> 0) & 7; + + if ( rm == 4 ) + { + DPRINTK("FIXME: Add decoding for the SIB byte.\n"); + goto fixme; + } + + /* Decode Reg and R/M fields. */ + regreg = decode_reg(regs, reg); + memreg = decode_reg(regs, rm); + + /* Decode Mod field. */ + switch ( modrm >> 6 ) + { + case 0: + if ( pseg == NULL ) + pseg = ®s->xds; + disp32 = 0; + if ( rm == 5 ) /* disp32 rather than (EBP) */ + { + memreg = NULL; + if ( get_user(disp32, (u32 *)pb) ) + { + DPRINTK("Fault while extracting .\n"); + goto fail; + } + pb += 4; + } + break; + + case 1: + if ( pseg == NULL ) /* NB. EBP defaults to SS */ + pseg = (rm == 5) ? ®s->xss : ®s->xds; + if ( get_user(disp8, pb) ) + { + DPRINTK("Fault while extracting .\n"); + goto fail; + } + pb++; + disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;; + break; + + case 2: + if ( pseg == NULL ) /* NB. EBP defaults to SS */ + pseg = (rm == 5) ? ®s->xss : ®s->xds; + if ( get_user(disp32, (u32 *)pb) ) + { + DPRINTK("Fault while extracting .\n"); + goto fail; + } + pb += 4; + break; + + case 3: + DPRINTK("Not a memory operand!\n"); + goto fail; + } + + offset = disp32; + if ( memreg != NULL ) + offset += *(u32 *)memreg; + + skip_modrm: + if ( !fixup_seg((u16)(*pseg), (signed long)offset >= 0) ) + goto fail; + + /* Success! */ + perfc_incrc(seg_fixups); + + /* If requested, give a callback on otherwise unused vector 15. */ + if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments_notify) ) + { + ti = &d->thread.traps[15]; + gtb = &guest_trap_bounce[d->processor]; + gtb->flags = GTBF_TRAP; + gtb->error_code = pb - eip; + gtb->cs = ti->cs; + gtb->eip = ti->address; + if ( TI_GET_IF(ti) ) + d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1; + } + + return 1; + + fixme: + DPRINTK("Undecodable instruction %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x " + "caused GPF(0) at %04x:%08lx\n", + eip[0], eip[1], eip[2], eip[3], + eip[4], eip[5], eip[6], eip[7], + regs->xcs, regs->eip); + fail: + return 0; +} diff --git a/xen/include/xen/perfc_defn.h b/xen/include/xen/perfc_defn.h index 4868d2a87c..f6868d78b9 100644 --- a/xen/include/xen/perfc_defn.h +++ b/xen/include/xen/perfc_defn.h @@ -1,5 +1,5 @@ -PERFCOUNTER_CPU (emulations, "instructions emulated" ) +PERFCOUNTER_CPU (seg_fixups, "segmentation fixups" ) PERFCOUNTER_CPU( irqs, "#interrupts" ) PERFCOUNTER_CPU( ipis, "#IPIs" ) -- 2.30.2